[2]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
[4]:
df.head()
[4]:
| Id | MSSubClass | MSZoning | LotFrontage | LotArea | Street | Alley | LotShape | LandContour | Utilities | ... | PoolArea | PoolQC | Fence | MiscFeature | MiscVal | MoSold | YrSold | SaleType | SaleCondition | SalePrice | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 60 | RL | 65.0 | 8450 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 2 | 2008 | WD | Normal | 208500 |
| 1 | 2 | 20 | RL | 80.0 | 9600 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 5 | 2007 | WD | Normal | 181500 |
| 2 | 3 | 60 | RL | 68.0 | 11250 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 9 | 2008 | WD | Normal | 223500 |
| 3 | 4 | 70 | RL | 60.0 | 9550 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 2 | 2006 | WD | Abnorml | 140000 |
| 4 | 5 | 60 | RL | 84.0 | 14260 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 12 | 2008 | WD | Normal | 250000 |
5 rows × 81 columns
[5]:
df['MSZoning'].value_counts()
[5]:
MSZoning RL 1151 RM 218 FV 65 RH 16 C (all) 10 Name: count, dtype: int64
[6]:
df.isnull().sum()
[6]:
Id 0
MSSubClass 0
MSZoning 0
LotFrontage 259
LotArea 0
...
MoSold 0
YrSold 0
SaleType 0
SaleCondition 0
SalePrice 0
Length: 81, dtype: int64[7]:
sns.heatmap(df.isnull(),yticklabels=False,cbar=False)
[7]:
<Axes: >
[8]:
df.shape
[8]:
(1460, 81)
[9]:
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1460 entries, 0 to 1459 Data columns (total 81 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Id 1460 non-null int64 1 MSSubClass 1460 non-null int64 2 MSZoning 1460 non-null object 3 LotFrontage 1201 non-null float64 4 LotArea 1460 non-null int64 5 Street 1460 non-null object 6 Alley 91 non-null object 7 LotShape 1460 non-null object 8 LandContour 1460 non-null object 9 Utilities 1460 non-null object 10 LotConfig 1460 non-null object 11 LandSlope 1460 non-null object 12 Neighborhood 1460 non-null object 13 Condition1 1460 non-null object 14 Condition2 1460 non-null object 15 BldgType 1460 non-null object 16 HouseStyle 1460 non-null object 17 OverallQual 1460 non-null int64 18 OverallCond 1460 non-null int64 19 YearBuilt 1460 non-null int64 20 YearRemodAdd 1460 non-null int64 21 RoofStyle 1460 non-null object 22 RoofMatl 1460 non-null object 23 Exterior1st 1460 non-null object 24 Exterior2nd 1460 non-null object 25 MasVnrType 588 non-null object 26 MasVnrArea 1452 non-null float64 27 ExterQual 1460 non-null object 28 ExterCond 1460 non-null object 29 Foundation 1460 non-null object 30 BsmtQual 1423 non-null object 31 BsmtCond 1423 non-null object 32 BsmtExposure 1422 non-null object 33 BsmtFinType1 1423 non-null object 34 BsmtFinSF1 1460 non-null int64 35 BsmtFinType2 1422 non-null object 36 BsmtFinSF2 1460 non-null int64 37 BsmtUnfSF 1460 non-null int64 38 TotalBsmtSF 1460 non-null int64 39 Heating 1460 non-null object 40 HeatingQC 1460 non-null object 41 CentralAir 1460 non-null object 42 Electrical 1459 non-null object 43 1stFlrSF 1460 non-null int64 44 2ndFlrSF 1460 non-null int64 45 LowQualFinSF 1460 non-null int64 46 GrLivArea 1460 non-null int64 47 BsmtFullBath 1460 non-null int64 48 BsmtHalfBath 1460 non-null int64 49 FullBath 1460 non-null int64 50 HalfBath 1460 non-null int64 51 BedroomAbvGr 1460 non-null int64 52 KitchenAbvGr 1460 non-null int64 53 KitchenQual 1460 non-null object 54 TotRmsAbvGrd 1460 non-null int64 55 Functional 1460 non-null object 56 Fireplaces 1460 non-null int64 57 FireplaceQu 770 non-null object 58 GarageType 1379 non-null object 59 GarageYrBlt 1379 non-null float64 60 GarageFinish 1379 non-null object 61 GarageCars 1460 non-null int64 62 GarageArea 1460 non-null int64 63 GarageQual 1379 non-null object 64 GarageCond 1379 non-null object 65 PavedDrive 1460 non-null object 66 WoodDeckSF 1460 non-null int64 67 OpenPorchSF 1460 non-null int64 68 EnclosedPorch 1460 non-null int64 69 3SsnPorch 1460 non-null int64 70 ScreenPorch 1460 non-null int64 71 PoolArea 1460 non-null int64 72 PoolQC 7 non-null object 73 Fence 281 non-null object 74 MiscFeature 54 non-null object 75 MiscVal 1460 non-null int64 76 MoSold 1460 non-null int64 77 YrSold 1460 non-null int64 78 SaleType 1460 non-null object 79 SaleCondition 1460 non-null object 80 SalePrice 1460 non-null int64 dtypes: float64(3), int64(35), object(43) memory usage: 924.0+ KB
[10]:
## Fill Missing Values
df['LotFrontage']=df['LotFrontage'].fillna(df['LotFrontage'].mean())
[11]:
df.drop(['Alley'],axis=1,inplace=True)
[12]:
df['BsmtCond']=df['BsmtCond'].fillna(df['BsmtCond'].mode()[0])
df['BsmtQual']=df['BsmtQual'].fillna(df['BsmtQual'].mode()[0])
[13]:
df['FireplaceQu']=df['FireplaceQu'].fillna(df['FireplaceQu'].mode()[0])
df['GarageType']=df['GarageType'].fillna(df['GarageType'].mode()[0])
[14]:
df.drop(['GarageYrBlt'],axis=1,inplace=True)
[15]:
df['GarageFinish']=df['GarageFinish'].fillna(df['GarageFinish'].mode()[0])
df['GarageQual']=df['GarageQual'].fillna(df['GarageQual'].mode()[0])
df['GarageCond']=df['GarageCond'].fillna(df['GarageCond'].mode()[0])
[16]:
df.drop(['PoolQC','Fence','MiscFeature'],axis=1,inplace=True)
[17]:
df.shape
[17]:
(1460, 76)
[18]:
df.drop(['Id'],axis=1,inplace=True)
[19]:
df.isnull().sum()
[19]:
MSSubClass 0
MSZoning 0
LotFrontage 0
LotArea 0
Street 0
..
MoSold 0
YrSold 0
SaleType 0
SaleCondition 0
SalePrice 0
Length: 75, dtype: int64[20]:
df['MasVnrType']=df['MasVnrType'].fillna(df['MasVnrType'].mode()[0])
df['MasVnrArea']=df['MasVnrArea'].fillna(df['MasVnrArea'].mode()[0])
[21]:
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='coolwarm')
[21]:
<Axes: >
[22]:
df['BsmtExposure']=df['BsmtExposure'].fillna(df['BsmtExposure'].mode()[0])
[23]:
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='YlGnBu')
[23]:
<Axes: >
[24]:
df['BsmtFinType2']=df['BsmtFinType2'].fillna(df['BsmtFinType2'].mode()[0])
[25]:
df.dropna(inplace=True)
[26]:
df.shape
[26]:
(1422, 75)
[27]:
df.head()
[27]:
| MSSubClass | MSZoning | LotFrontage | LotArea | Street | LotShape | LandContour | Utilities | LotConfig | LandSlope | ... | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MiscVal | MoSold | YrSold | SaleType | SaleCondition | SalePrice | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 60 | RL | 65.0 | 8450 | Pave | Reg | Lvl | AllPub | Inside | Gtl | ... | 0 | 0 | 0 | 0 | 0 | 2 | 2008 | WD | Normal | 208500 |
| 1 | 20 | RL | 80.0 | 9600 | Pave | Reg | Lvl | AllPub | FR2 | Gtl | ... | 0 | 0 | 0 | 0 | 0 | 5 | 2007 | WD | Normal | 181500 |
| 2 | 60 | RL | 68.0 | 11250 | Pave | IR1 | Lvl | AllPub | Inside | Gtl | ... | 0 | 0 | 0 | 0 | 0 | 9 | 2008 | WD | Normal | 223500 |
| 3 | 70 | RL | 60.0 | 9550 | Pave | IR1 | Lvl | AllPub | Corner | Gtl | ... | 272 | 0 | 0 | 0 | 0 | 2 | 2006 | WD | Abnorml | 140000 |
| 4 | 60 | RL | 84.0 | 14260 | Pave | IR1 | Lvl | AllPub | FR2 | Gtl | ... | 0 | 0 | 0 | 0 | 0 | 12 | 2008 | WD | Normal | 250000 |
5 rows × 75 columns
[28]:
##HAndle Categorical Features
[29]:
columns=['MSZoning','Street','LotShape','LandContour','Utilities','LotConfig','LandSlope','Neighborhood',
'Condition2','BldgType','Condition1','HouseStyle','SaleType',
'SaleCondition','ExterCond',
'ExterQual','Foundation','BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1','BsmtFinType2',
'RoofStyle','RoofMatl','Exterior1st','Exterior2nd','MasVnrType','Heating','HeatingQC',
'CentralAir',
'Electrical','KitchenQual','Functional',
'FireplaceQu','GarageType','GarageFinish','GarageQual','GarageCond','PavedDrive']
[30]:
len(columns)
[30]:
39
[97]:
test_df.to_csv('formulatedtest.csv',index=False)
[98]:
## Combine Test Data
test_df=pd.read_csv('formulatedtest.csv')
[99]:
test_df.shape
[99]:
(1459, 74)
[100]:
test_df.head()
[100]:
| MSSubClass | MSZoning | LotFrontage | LotArea | Street | LotShape | LandContour | Utilities | LotConfig | LandSlope | ... | OpenPorchSF | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MiscVal | MoSold | YrSold | SaleType | SaleCondition | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 20 | RH | 80.0 | 11622 | Pave | Reg | Lvl | AllPub | Inside | Gtl | ... | 0 | 0 | 0 | 120 | 0 | 0 | 6 | 2010 | WD | Normal |
| 1 | 20 | RL | 81.0 | 14267 | Pave | IR1 | Lvl | AllPub | Corner | Gtl | ... | 36 | 0 | 0 | 0 | 0 | 12500 | 6 | 2010 | WD | Normal |
| 2 | 60 | RL | 74.0 | 13830 | Pave | IR1 | Lvl | AllPub | Inside | Gtl | ... | 34 | 0 | 0 | 0 | 0 | 0 | 3 | 2010 | WD | Normal |
| 3 | 60 | RL | 78.0 | 9978 | Pave | IR1 | Lvl | AllPub | Inside | Gtl | ... | 36 | 0 | 0 | 0 | 0 | 0 | 6 | 2010 | WD | Normal |
| 4 | 120 | RL | 43.0 | 5005 | Pave | IR1 | HLS | AllPub | Inside | Gtl | ... | 82 | 0 | 0 | 144 | 0 | 0 | 1 | 2010 | WD | Normal |
5 rows × 74 columns
[102]:
final_df.shape
[102]:
(2881, 75)
[64]:
final_df.shape
[64]:
(2881, 75)
[103]:
final_df=category_onehot_multcols(columns)
MSZoning Street LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition2 BldgType Condition1 HouseStyle SaleType SaleCondition ExterCond ExterQual Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinType2 RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType Heating HeatingQC CentralAir Electrical KitchenQual Functional FireplaceQu GarageType GarageFinish GarageQual GarageCond PavedDrive
[104]:
final_df.shape
[104]:
(2881, 234)
[105]:
final_df =final_df.loc[:,~final_df.columns.duplicated()]
[106]:
final_df.shape
[106]:
(2881, 174)
[107]:
df_Train=final_df.iloc[:1422,:]
df_Test=final_df.iloc[1422:,:]
[72]:
df_Test.head()
[72]:
| MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | BsmtFinSF2 | ... | Min1 | Min2 | Typ | Attchd | Basment | BuiltIn | CarPort | Detchd | RFn | P | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 20 | 80.0 | 11622 | 5 | 6 | 1961 | 1961 | 0.0 | 468.0 | 144.0 | ... | False | False | True | True | False | False | False | False | False | False |
| 1 | 20 | 81.0 | 14267 | 6 | 6 | 1958 | 1958 | 108.0 | 923.0 | 0.0 | ... | False | False | True | True | False | False | False | False | False | False |
| 2 | 60 | 74.0 | 13830 | 5 | 5 | 1997 | 1998 | 0.0 | 791.0 | 0.0 | ... | False | False | True | True | False | False | False | False | False | False |
| 3 | 60 | 78.0 | 9978 | 6 | 6 | 1998 | 1998 | 20.0 | 602.0 | 0.0 | ... | False | False | True | True | False | False | False | False | False | False |
| 4 | 120 | 43.0 | 5005 | 8 | 5 | 1992 | 1992 | 0.0 | 263.0 | 0.0 | ... | False | False | True | True | False | False | False | False | True | False |
5 rows × 174 columns
[73]:
df_Train.shape
[73]:
(1422, 174)
[109]:
df_Test.shape
[109]:
(1459, 173)
[110]:
X_train=df_Train.drop(['SalePrice'],axis=1)
y_train=df_Train['SalePrice']
[111]:
import xgboost
classifier=xgboost.XGBRegressor()
classifier.fit(X_train,y_train)
[111]:
XGBRegressor(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, device=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
multi_strategy=None, n_estimators=None, n_jobs=None,
num_parallel_tree=None, random_state=None, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
XGBRegressor(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, device=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
multi_strategy=None, n_estimators=None, n_jobs=None,
num_parallel_tree=None, random_state=None, ...)[78]:
booster=['gbtree','gblinear']
base_score=[0.25,0.5,0.75,1]
[89]:
from sklearn.model_selection import RandomizedSearchCV
[ ]:
regressor=xgboost.XGBRegressor(base_score=0.25, booster='gbtree', colsample_bylevel=1,
colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
max_depth=2, min_child_weight=1, missing=None, n_estimators=900,
n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
silent=True, subsample=1)
[126]:
print(datasets.head())
Id SalePrice 0 1 132912.296875 1 2 147271.734375 2 3 195217.859375 3 4 193647.890625 4 5 194645.281250
[135]:
Selection deleted
##Create Sample Submission file and Submit
pred=pd.DataFrame(y_pred)
sub_df=pd.read_csv('sample_submission.csv')
datasets=pd.concat([sub_df['Id'],pred],axis=1)
datasets.columns=['Id','SalePrice']
datasets.to_csv('/kaggle/working/sample_submission_dir/sample_submission.csv', index=False)
Having trouble finding a file? You'll find some useful tips in our Finding Your Files tutorial.
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) Cell In[135], line 3 1 ##Create Sample Submission file and Submit 2 pred=pd.DataFrame(y_pred) ----> 3 sub_df=pd.read_csv('sample_submission.csv') 4 datasets=pd.concat([sub_df['Id'],pred],axis=1) 5 datasets.columns=['Id','SalePrice'] File /opt/conda/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1026, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend) 1013 kwds_defaults = _refine_defaults_read( 1014 dialect, 1015 delimiter, (...) 1022 dtype_backend=dtype_backend, 1023 ) 1024 kwds.update(kwds_defaults) -> 1026 return _read(filepath_or_buffer, kwds) File /opt/conda/lib/python3.10/site-packages/pandas/io/parsers/readers.py:620, in _read(filepath_or_buffer, kwds) 617 _validate_names(kwds.get("names", None)) 619 # Create the parser. --> 620 parser = TextFileReader(filepath_or_buffer, **kwds) 622 if chunksize or iterator: 623 return parser File /opt/conda/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1620, in TextFileReader.__init__(self, f, engine, **kwds) 1617 self.options["has_index_names"] = kwds["has_index_names"] 1619 self.handles: IOHandles | None = None -> 1620 self._engine = self._make_engine(f, self.engine) File /opt/conda/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1880, in TextFileReader._make_engine(self, f, engine) 1878 if "b" not in mode: 1879 mode += "b" -> 1880 self.handles = get_handle( 1881 f, 1882 mode, 1883 encoding=self.options.get("encoding", None), 1884 compression=self.options.get("compression", None), 1885 memory_map=self.options.get("memory_map", False), 1886 is_text=is_text, 1887 errors=self.options.get("encoding_errors", "strict"), 1888 storage_options=self.options.get("storage_options", None), 1889 ) 1890 assert self.handles is not None 1891 f = self.handles.handle File /opt/conda/lib/python3.10/site-packages/pandas/io/common.py:873, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options) 868 elif isinstance(handle, str): 869 # Check whether the filename is to be opened in binary mode. 870 # Binary mode does not support 'encoding' and 'newline'. 871 if ioargs.encoding and "b" not in ioargs.mode: 872 # Encoding --> 873 handle = open( 874 handle, 875 ioargs.mode, 876 encoding=ioargs.encoding, 877 errors=errors, 878 newline="", 879 ) 880 else: 881 # Binary mode 882 handle = open(handle, ioargs.mode) FileNotFoundError: [Errno 2] No such file or directory: 'sample_submission.csv'
[92]:
df_Test.shape
[92]:
(1459, 173)
[93]:
df_Test.head()
[93]:
| MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | BsmtFinSF2 | ... | Min1 | Min2 | Typ | Attchd | Basment | BuiltIn | CarPort | Detchd | RFn | P | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 20 | 80.0 | 11622 | 5 | 6 | 1961 | 1961 | 0.0 | 468.0 | 144.0 | ... | False | False | True | True | False | False | False | False | False | False |
| 1 | 20 | 81.0 | 14267 | 6 | 6 | 1958 | 1958 | 108.0 | 923.0 | 0.0 | ... | False | False | True | True | False | False | False | False | False | False |
| 2 | 60 | 74.0 | 13830 | 5 | 5 | 1997 | 1998 | 0.0 | 791.0 | 0.0 | ... | False | False | True | True | False | False | False | False | False | False |
| 3 | 60 | 78.0 | 9978 | 6 | 6 | 1998 | 1998 | 20.0 | 602.0 | 0.0 | ... | False | False | True | True | False | False | False | False | False | False |
| 4 | 120 | 43.0 | 5005 | 8 | 5 | 1992 | 1992 | 0.0 | 263.0 | 0.0 | ... | False | False | True | True | False | False | False | False | True | False |
5 rows × 173 columns
[94]:
y_pred=regressor.predict(df_Test.drop(['SalePrice'],axis=1))
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Cell In[94], line 1 ----> 1 y_pred=regressor.predict(df_Test.drop(['SalePrice'],axis=1)) File /opt/conda/lib/python3.10/site-packages/pandas/core/frame.py:5581, in DataFrame.drop(self, labels, axis, index, columns, level, inplace, errors) 5433 def drop( 5434 self, 5435 labels: IndexLabel | None = None, (...) 5442 errors: IgnoreRaise = "raise", 5443 ) -> DataFrame | None: 5444 """ 5445 Drop specified labels from rows or columns. 5446 (...) 5579 weight 1.0 0.8 5580 """ -> 5581 return super().drop( 5582 labels=labels, 5583 axis=axis, 5584 index=index, 5585 columns=columns, 5586 level=level, 5587 inplace=inplace, 5588 errors=errors, 5589 ) File /opt/conda/lib/python3.10/site-packages/pandas/core/generic.py:4788, in NDFrame.drop(self, labels, axis, index, columns, level, inplace, errors) 4786 for axis, labels in axes.items(): 4787 if labels is not None: -> 4788 obj = obj._drop_axis(labels, axis, level=level, errors=errors) 4790 if inplace: 4791 self._update_inplace(obj) File /opt/conda/lib/python3.10/site-packages/pandas/core/generic.py:4830, in NDFrame._drop_axis(self, labels, axis, level, errors, only_slice) 4828 new_axis = axis.drop(labels, level=level, errors=errors) 4829 else: -> 4830 new_axis = axis.drop(labels, errors=errors) 4831 indexer = axis.get_indexer(new_axis) 4833 # Case for non-unique axis 4834 else: File /opt/conda/lib/python3.10/site-packages/pandas/core/indexes/base.py:7070, in Index.drop(self, labels, errors) 7068 if mask.any(): 7069 if errors != "ignore": -> 7070 raise KeyError(f"{labels[mask].tolist()} not found in axis") 7071 indexer = indexer[~mask] 7072 return self.delete(indexer) KeyError: "['SalePrice'] not found in axis"
[ ]:
pred.columns=['SalePrice']
[ ]:
temp_df=df_Train['SalePrice'].copy()
[ ]:
temp_df.column=['SalePrice']
[ ]:
df_Train.drop(['SalePrice'],axis=1,inplace=True)
[ ]:
df_Train=pd.concat([df_Train,temp_df],axis=1)
[ ]:
df_Test.head()
[ ]:
df_Test=pd.concat([df_Test,pred],axis=1)
[ ]:
[ ]:
df_Train=pd.concat([df_Train,df_Test],axis=0)
[ ]:
df_Train.shape
[ ]:
X_train=df_Train.drop(['SalePrice'],axis=1)
y_train=df_Train['SalePrice']
[ ]:
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LeakyReLU,PReLU,ELU
from keras.layers import Dropout
# Initialising the ANN
classifier = Sequential()
# Adding the input layer and the first hidden layer
classifier.add(Dense(output_dim = 50, init = 'he_uniform',activation='relu',input_dim = 174))
# Adding the second hidden layer
classifier.add(Dense(output_dim = 25, init = 'he_uniform',activation='relu'))
# Adding the third hidden layer
classifier.add(Dense(output_dim = 50, init = 'he_uniform',activation='relu'))
# Adding the output layer
classifier.add(Dense(output_dim = 1, init = 'he_uniform'))
# Compiling the ANN
classifier.compile(loss=root_mean_squared_error, optimizer='Adamax')
# Fitting the ANN to the Training set
model_history=classifier.fit(X_train.values, y_train.values,validation_split=0.20, batch_size = 10, nb_epoch = 1000)
[ ]:
ann_pred=classifier.predict(df_Test.drop(['SalePrice'],axis=1).values)
[ ]:
from keras import backend as K
def root_mean_squared_error(y_true, y_pred):
return K.sqrt(K.mean(K.square(y_pred - y_true)))
Notebook cell shifted down successfully
